Package com.apps.services

Source Code of com.apps.services.UBCCourseSpiderService

/*
* AUTHOR: Kevin Lam
*/

package com.apps.services;

import java.io.BufferedReader;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import com.apps.datastore.dao.BookInformationObject;
import com.apps.datastore.dao.CourseInformationObject;
import com.apps.datastore.dao.DepartmentInformationObject;
import com.apps.datastore.dao.SectionInformationObject;
import com.apps.utils.*;
import com.google.appengine.api.datastore.Entity;

public class UBCCourseSpiderService {
 
  public UBCCourseSpiderService(){
  }

  public List<BookInformationObject> getBooks(
      SectionInformationObject sio) {
    List<BookInformationObject> biol = new ArrayList<BookInformationObject>();
    try {
      BufferedReader br = WrapperUtils
          .getURLContentReader(WrapperUtils.COURSE_INFO_URL
              .replace("<DEPARTMENT>", sio.getDepartmentId())
              .replace("<COURSE>", sio.getCourseId())
              .replace("<SECTION>", sio.getSectionId()));
      String line;

      line = br.readLine();

      while (line != null) {
        if (line.contains("<b>Book Summary</b>")) {
          if (line.contains("<b>ISBN &nbsp;&nbsp; </b></td></tr><tr>")) {

            String books = line.substring(line
                .indexOf("<td class='section"));
            while (books.contains("<td class='section")) {
              String title = "";
              String required = "";
              String author = "";
              String isbn = "";
              for (int i = 0; i < 4; i++) {
                String s = books
                    .substring(
                        books.indexOf("<td class='section") + 22,
                        books.indexOf("</td>"));

                switch (i) {
                case 0:
                  title = s;
                  break;
                case 1:
                  required = s;
                  break;
                case 2:
                  author = s;
                  break;
                case 3:
                  isbn = s;
                  break;
                }
                books = books
                    .substring(books.indexOf("</td>") + 5);
              }
              biol.add(new BookInformationObject(sio
                  .getDepartmentId(), sio.getCourseId(), sio
                  .getSectionId(), title, required, author,
                  isbn));
            }
          }
        }
        line = br.readLine();
      }
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    return biol;
  }

  public List<SectionInformationObject> getSections(
      CourseInformationObject cio) {
    List<SectionInformationObject> siol = new ArrayList<SectionInformationObject>();
    try {
      BufferedReader br = WrapperUtils
          .getURLContentReader(WrapperUtils.SECTIONS_URL.replace(
              "<DEPARTMENT>", cio.getDepartmentId()).replace(
              "<COURSE>", cio.getCourseId()));
      String line = br.readLine();
      String id = "";
      String activity = "";
      String term = "";
      String day = "";
      String location = "";
      String start = "";
      String end = "";
      String instructor = "";
      String building = "";
      String room = "";
      boolean cdf = false;
      String dropNoWDate = "";
      String dropWDate = "";
      while(line!=null&&!line.contains(" <!-- TABLE FOR AREA 1-->")){ //Keep looping 
        line=br.readLine();}
      while(line!=null&&!line.contains("<B>")){
        line=br.readLine();}
      location=WrapperUtils.stringInBwtn("0\">", "</a>", line);
//      System.out.println("location: "+location);
 
      while (line!=null&&!line.contains("<!-- end of TABLE FOR AREA 1 -->")) {
        if (line.contains("<b>This course is eligible for Credit/D/Fail grading.</b>")) {
          cdf = true;
        } else if (line.contains("<tr class=section1>")
            || line.contains("<tr class=section2>")) {
          line = br.readLine();
          while (line!=null&&!line.contains("</td>")) {
            if (line.contains("<a href=")) {
              id = WrapperUtils.stringInBwtn(
                  "&dept=" + cio.getDepartmentId()
                      + "&course=" + cio.getCourseId()
                      + "&section=", "\"", line);
            }
            line = br.readLine();
          }
          line = br.readLine();

          while (line!=null&&!line.contains("<td nowrap>")) {
            line = br.readLine();
          }
          activity = WrapperUtils.stringInBwtn("<td nowrap>",
              "</td>", line);
//          System.out.println("Activity " + activity);
          line = br.readLine();
          term = WrapperUtils.stringInBwtn("<td nowrap>", "</td>",
              line);
//          System.out.println("term " + term);
          line = br.readLine();
          day = WrapperUtils.stringInBwtn("<td nowrap>", "</td>",
              line);
          day = WrapperUtils.trimAll(day);
//          System.out.println("day " + day);
          line = br.readLine();
          start = WrapperUtils.stringInBwtn("<td nowrap>", "</td>",
              line);
//          System.out.println("start " + start);
          line = br.readLine();
          end = WrapperUtils.stringInBwtn("<td nowrap>", "</td>",
              line);
//          System.out.println("end " + end);
          line = br.readLine();

          BufferedReader br2 = WrapperUtils
              .getURLContentReader(WrapperUtils.COURSE_INFO_URL
                  .replace("<DEPARTMENT>",
                      cio.getDepartmentId())
                  .replace("<COURSE>", cio.getCourseId())
                  .replace("<SECTION>", id));
          String line2 = br2.readLine();
          while (line2!=null&&!line2.contains("<!-- end of TABLE FOR AREA 1 -->")) {
            if (line2
                .contains("Last day to withdraw without a W standing : "))
              dropNoWDate = WrapperUtils.stringInBwtn("<b>",
                  "</b>", line2);
            if (line2
                .contains("Last day to withdraw with a W standing"))
              dropWDate = WrapperUtils.stringInBwtn("<b>",
                  "</b>", line2);

            if (line2.contains("<td nowrap>Instructor")) {
              while (line!=null&&!line2.contains("</table>")) {
                if (line2.contains("<td nowrap><a href=")) {
                  if (instructor != "")
                    instructor += " / "
                        + WrapperUtils.stringInBwtn(
                            "\">", "</a></td>",
                            line2);
                  else
                    instructor = WrapperUtils.stringInBwtn(
                        "\">", "</a></td>", line2);
                }
                line2 = br2.readLine();
              }
            }

            if (line2!=null&&line2.contains("<td nowrap><b>Building</b></td>")) {
              line2 = br2.readLine();
              while (line2!=null&&line2.contains("<td nowrap><b>")) {
                line2 = br2.readLine();
              } // Loop to end of <td nowrap><b>

              // Now loop to <td nowrap>
              while (line!=null&&!line2.contains("<td nowrap>")) {
                line2 = br2.readLine();
              }

              line2 = br2.readLine();
              line2 = br2.readLine();
              line2 = br2.readLine();
              line2 = br2.readLine();

              // Difference between the Building Title and
              // Building Name is

              building = WrapperUtils.stringInBwtn("<td nowrap>",
                  "</td>", line2);
              building = WrapperUtils.trimAll(building);
//              System.out.println("building " + building);
              line2 = br.readLine();
              if (building.equals("WAIT"))
                building = "";
              while (line2!=null&&!line2.contains("<td nowrap>")) {
                line2 = br2.readLine();
              }
              line2 = br2.readLine();
              StringBuilder stringBuilder = new StringBuilder();

              while (line2!=null && !line2.contains("</td>")) {
                line2=br2.readLine();
                if(line2 != null) {
                  line2=line2.trim();
               
                stringBuilder.append(line2);
                }
   
              }
           
              String l_room=stringBuilder.toString();
              l_room=l_room.trim();
           

              if (l_room.contains("</td>")) {
                room = "";
              } else if (l_room.contains("<a href")) {
                //System.out.println("BOOYOO");
                room = WrapperUtils.stringInBwtn("\">", "</a>",
                    l_room);
              } else {
                room =l_room;
              }

              // System.out.println(line2);
              // if(line2.contains("<a href")){
              // room =
              // WrapperUtils.stringInBwtn("\">","</a>",line2);
              // }else
              // if(building.contains("No Scheduled Meeting")||building.equals("")
              // ||building.contains("To Be Announced")){
              // room="";
              // }else if (room.contains("</td>")){
              // room="";
              // }
              // else{
              // room=line2;
              // }
              room = WrapperUtils.trimAll(room);
//              System.out.println("room " + room);
              if (room.equals("LIST"))
                room = "";
            }
            line2 = br2.readLine();
          }
          // System.out.println(id + ", " + activity + ", " + term +
          // ", " + day + ", " + location + ", " + start + ", " + end
          // + ", " + instructor + ", " + building + ", " + room +
          // ", " + cdf + ", " + dropNoWDate + ", " + dropWDate);
          SectionInformationObject sio = new SectionInformationObject(
              cio.getDepartmentId(), cio.getCourseId(), id,
              activity, term, day, location, start, end,
              instructor, building, room, cdf, dropNoWDate,
              dropWDate);
          if (id != "" && id != null)
            siol.add(sio);
          id = "";
          activity = "";
          term = "";
          day = "";
          location = "";
          start = "";
          end = "";
          instructor = "";
          building = "";
          room = "";
          cdf = false;
          dropNoWDate = "";
          dropWDate = "";

        }

        line = br.readLine();
      }
    } catch (MalformedURLException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    return siol;
  }

  public List<CourseInformationObject> getCourses(
      DepartmentInformationObject dio) {
    List<CourseInformationObject> ciol = new ArrayList<CourseInformationObject>();
    try {
      BufferedReader br = WrapperUtils
          .getURLContentReader(WrapperUtils.COURSES_URL.replace(
              "<DEPARTMENT>", dio.getDepartmentId()));
      String line = br.readLine();
      String id = "";
      String title = "";
      String prereq = "";
      String coreq = "";
      long credits = 0;
      int counter = 0;

      while (line != null) {
        if (line.contains("<tr class=section1>")
            || line.contains("<tr class=section2>")) {
          while (counter < 2) {
            line = br.readLine();
            if (line!=null&&line.contains("<a href=")) {
              id = WrapperUtils.stringInBwtn(
                  "\">" + dio.getDepartmentId() + " ",
                  "</a>", line);
              counter++;
            } else if (line!=null&&line.contains("<td nowrap>")) {

              title = WrapperUtils.stringInBwtn("<td nowrap>",
                  "</td>", line);
              counter++;

            }
          }

          BufferedReader br2 = WrapperUtils
              .getURLContentReader(WrapperUtils.SECTIONS_URL
                  .replace("<DEPARTMENT>",
                      dio.getDepartmentId()).replace(
                      "<COURSE>", id));
          String line2 = br2.readLine();
          while (line2!=null&&!line2.contains("<!-- end of TABLE FOR AREA 1 -->")) {
            if (line2.contains("Pre-reqs:")) {
              prereq = WrapperUtils.stringInBwtn("Pre-reqs:",
                  "<br><br>", line2);
              prereq = WrapperUtils.trimAll(prereq);
              prereq = WrapperUtils.removeHTML(prereq);
            } else if (line2.contains("Co-reqs:")) {
              coreq = WrapperUtils.stringInBwtn("Pre-reqs:",
                  "<br><br>", line2);
              coreq = WrapperUtils.trimAll(coreq);
              coreq = WrapperUtils.removeHTML(coreq);
            } else if (line2.contains("Credits:")) {
              String parseNum = line2.replaceAll("[^0-9]", "");
              if (parseNum.equals( "") && parseNum==null)
                credits = Long.parseLong(parseNum);
              // credits
              // =Long.parseLong(line.substring(line.indexOf("Credits: ")+("Credits: ".length())));
              // Long.parseLong(WrapperUtils.stringInBwtn("Credits: ","\n",
              // line2));
            }
            line2 = br2.readLine();
          }
          CourseInformationObject cio = new CourseInformationObject(
              dio.getDepartmentId(), id, title, prereq, coreq,
              credits);
          if (id != "" && id != null)
            ciol.add(cio);
          id = "";
          title = "";
          prereq = "";
          coreq = "";
          credits = 0;
          counter = 0;
        }
        line = br.readLine();
      }

    } catch (MalformedURLException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

    return ciol;
  }

  public List<DepartmentInformationObject> getDepartments() {
    List<DepartmentInformationObject> diol = new ArrayList<DepartmentInformationObject>();
    try {
      BufferedReader br = WrapperUtils
          .getURLContentReader(WrapperUtils.DEPARTMENTS_URL);
      String line;
      String code = "";
      String title = "";
      String faculty = "";
      int counter = 0;
      line = br.readLine();

      while (line != null) {

        // System.out.println(line);
        if (line.contains("<tr class=section1>")
            || line.contains("<tr class=section2>")) {
          while (counter < 2) {
            if (line.contains("<a href=")) {
              code = WrapperUtils.stringInBwtn("\">", "</a>",
                  line);
            }
            if (line.contains("<td nowrap>")) {

              switch (counter) {
              case 0:

                title = WrapperUtils.stringInBwtn(
                    "<td nowrap>", "</td>", line);
                ++counter;

                // System.out.println("The course is "+course);
                line = br.readLine();
                // Add to database.
                break;

              case 1:
                faculty = WrapperUtils.stringInBwtn(
                    "<td nowrap>", "</td>", line);
                ++counter;
                // System.out.println("The faculty is "+faculty);
                // Add to database.
                break;

              }

            } else {
              line = br.readLine();
            }

          }

          DepartmentInformationObject dio = new DepartmentInformationObject(
              title, code, faculty);
          if (code != "" && code != null)
            diol.add(dio);
          code = "";
          title = "";
          faculty = "";
          counter = 0;
        }
        line = br.readLine();
      }
    } catch (MalformedURLException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    return diol;

  }


}
TOP

Related Classes of com.apps.services.UBCCourseSpiderService

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.